import matplotlib.pyplot as plt
import numpy as np
import h5py
import seaborn as sns
import pandas as pd
import scipy.stats as stats
import matplotlib as mpl
from scipy.signal import hilbert
import statsmodels.api as sm
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import OPTICS
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler
from imblearn.over_sampling import SVMSMOTE
from sklearn.svm import SVC
import tensorflow.keras.layers as layers
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras import callbacks
from tensorflow.keras.models import load_model
from imblearn import under_sampling
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix
from keras.utils import to_categorical
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
%matplotlib inline
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.fontsize'] = 20
plt.rcParams['figure.titlesize'] = 26
plt.rcParams['xtick.major.size'] = 10
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['ytick.major.size'] = 10
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.minor.size'] = 5
plt.rcParams['ytick.minor.width'] = 1
plt.rcParams['xtick.minor.size'] = 5
plt.rcParams['figure.figsize'] = 10,7
sns.set_style('ticks')
mpl.style.use('fivethirtyeight')
# Sample frequency
f_sample = 12e3
# Axis rotation frequency
f_rotation = 28.68
bpfo_multiplier = 3.5848
f_bpfo = f_rotation * bpfo_multiplier
f_bpfo_harmonics = f_bpfo*(1+np.arange(20))
print("Expected BPFO first frequency " + str(f_bpfo) + " Hz")
print("Expected BPFO harmonic frequencies " + str(f_bpfo_harmonics) + " Hz")
hf = h5py.File("x_baseline.h5", "r")
x_baseline = np.array(hf.get("x_baseline"))
hf = h5py.File("x_fault.h5", "r")
x_fault = np.array(hf.get("x_fault"))
print(f' Mean Baseline : {x_baseline.ravel().mean().round(3)}')
print(f' STD Baseline : {x_baseline.ravel().std().round(3)}')
print(f' Mean Faulty : {x_fault.ravel().mean().round(3)}')
print(f' STD Faulty : {x_fault.ravel().std().round(3)}')
fig,ax = plt.subplots(figsize=(16,28))
fig.subplots_adjust(hspace=0.5)
plt.subplot(5,2,1)
sns.distplot(x_baseline.ravel(), label='Baseline')
sns.distplot(x_fault.ravel(),label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of Complete Data',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline.ravel()):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline.ravel()):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault.ravel()):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault.ravel()):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,2)
sns.distplot(x_baseline[0], label='Baseline')
sns.distplot(x_fault[0],label='Faulty')
plt.ylabel('Population')
plt.xlim([-5,5])
plt.title('Distribution in the first 1 second',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,3)
sns.distplot(x_baseline[0][0:5000], label='Baseline')
sns.distplot(x_fault[0][0:5000],label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of first 5000 samples',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0][0:5000]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0][0:5000]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0][0:5000]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0][0:5000]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,4)
sns.distplot(x_baseline[0][0:1000], label='Baseline')
sns.distplot(x_fault[0][0:1000],label='Faulty')
plt.ylabel('Population')
plt.xlim([-5,5])
plt.title('Distribution of first 1000 samples',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0][0:1000]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0][0:1000]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0][0:1000]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0][0:1000]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,5)
time = np.arange(0,40,1/f_sample)
N = len(time)
plt.ylim([-20,20])
plt.title('Complete signal data',size=20)
plt.plot(time, x_baseline.ravel(),label='Baseline')
plt.plot(time[0:int(N/4)], x_fault.ravel(),label='Fault')
plt.xlabel('Time')
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,6)
plt.title('Signal data for the first 1 second',size=20)
plt.plot(time[0:int(N/40)], x_baseline[0],label='Baseline')
plt.plot(time[0:int(N/40)], x_fault[0],label='Fault')
plt.ylim([-20,20])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,7)
plt.title('Signal for first 5000 samples',size=20)
plt.plot(time[0:5000], x_baseline[0][0:5000],label='Baseline')
plt.plot(time[0:5000], x_fault[0][0:5000],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,8)
plt.title('Signal for first 1000 samples',size=20)
plt.plot(time[0:1000], x_baseline[0][0:1000],label='Baseline')
plt.plot(time[0:1000], x_fault[0][0:1000],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
plt.subplot(5,2,9)
plt.title('Signal for first 500 samples',size=20)
plt.plot(time[0:500], x_baseline[0][0:500],label='Baseline')
plt.plot(time[0:500], x_fault[0][0:500],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=2)
plt.subplot(5,2,10)
plt.title('Signal for first 100 samples',size=20)
plt.plot(time[0:100], x_baseline[0][0:100],label='Baseline')
plt.plot(time[0:100], x_fault[0][0:100],label='Fault')
plt.ylim([-10,10])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1);
plt.subplots(figsize=(15,7))
plt.subplot(1,2,1)
stats.probplot(x_baseline.ravel(), dist="norm",plot=plt)
plt.title('Baseline',size=25);
plt.subplot(1,2,2)
stats.probplot(x_fault.ravel(),dist="norm", plot=plt)
plt.title('Faulty',size=25);
df=pd.concat([pd.Series(x_baseline.ravel()),pd.Series(x_fault.ravel())],axis=1)
df.columns = ['Baseline','Faulty']
plt.figure(figsize=(7,15))
plt.xticks(size=20)
plt.yticks(size=20)
sns.boxplot(data=df);
fig = make_subplots(rows=3, cols=1, subplot_titles=['Baseline', 'Faulty', 'Faulty'], shared_yaxes=True)
freqs = np.fft.fftfreq(len(x_baseline[0]), 1/f_sample)
idx = np.argsort(freqs)
ps = ((2/x_baseline[0].size)*np.abs(np.fft.fft(x_baseline[0])))
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Baseline'),
row=1, col=1)
freqs = np.fft.fftfreq(len(x_fault[0]), 1/f_sample)
idx = np.argsort(freqs)
ps = ((2/x_fault[0].size)*np.abs(np.fft.fft(x_fault[0])))
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Faulty'),
row=2, col=1)
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]/f_bpfo), y=ps[idx], name = 'Faulty'),
row=3, col=1)
fig.update_xaxes(title_text="Frequency (Hz)", row=1, col=1)
fig.update_xaxes(title_text="Frequency (Hz)", row=2, col=1)
fig.update_xaxes(title_text="F_bpfo Multiples", row=3, col=1)
fig.update_yaxes(title_text="Amplitude", row=1, col=1)
fig.update_yaxes(title_text="Amplitude", row=2, col=1)
fig.update_yaxes(title_text="Amplitude", row=3, col=1)
fig.update_layout(title_text="Vibration Spectrum : Baseline vs Faulty", height=700)
fig.show()
analytic_signal_base = hilbert(x_baseline.ravel())
envelope_base = np.abs(analytic_signal_base.ravel())
analytic_signal_fault = hilbert(x_fault.ravel())
envelope_fault = np.abs(analytic_signal_fault.ravel())
fig,ax = plt.subplots(figsize=(16,12))
fig.subplots_adjust(hspace=0.5)
plt.subplot(2,2,1)
sns.distplot(envelope_base, label='Baseline')
sns.distplot(envelope_fault,label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of Complete Data',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(envelope_base):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(envelope_base):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(envelope_fault):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(envelope_fault):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(2,2,2)
plt.title('Complete envelope data',size=20)
plt.plot(time, envelope_base,label='Baseline')
plt.plot(time[0:len(time)//4], envelope_fault,label='Fault')
plt.ylim([0,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
plt.subplot(2,2,3)
sns.distplot(envelope_base[0:len(envelope_base)//40], label='Baseline')
sns.distplot(envelope_fault[0:len(envelope_fault)//10],label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of first 1 second',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(envelope_base[0:len(envelope_base)//40]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(envelope_base[0:len(envelope_base)//40]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(envelope_fault[0:len(envelope_fault)//10]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(envelope_fault[0:len(envelope_fault)//10]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)
plt.subplot(2,2,4)
plt.title('Envelope Signal for 1 second',size=20)
plt.plot(time[0:len(time)//40], envelope_base[0:len(envelope_base)//40],label='Baseline')
plt.plot(time[0:len(time)//40], envelope_fault[0:len(envelope_fault)//10],label='Fault')
plt.ylim([0,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
fig = make_subplots(rows=3, cols=1, subplot_titles=['Baseline', 'Faulty'], shared_yaxes=True)
#fft baseline
freqs = np.fft.fftfreq(len(envelope_base), 1/f_sample)
idx = np.argsort(freqs)
ps = np.abs(np.fft.fft(envelope_base))**2
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Baseline'),
row=1, col=1)
#fft faulty
freqs = np.fft.fftfreq(len(envelope_fault), 1/f_sample)
idx = np.argsort(freqs)
ps = np.abs(np.fft.fft(envelope_fault))**2
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Faulty'),
row=2, col=1)
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]/f_bpfo), y=ps[idx], name = 'Faulty'),
row=3, col=1)
fig.update_xaxes(title_text="Frequency (Hz)", range=[0, 500], row=1, col=1)
fig.update_xaxes(title_text="Frequency (Hz)", range=[0, 500], row=2, col=1)
fig.update_xaxes(title_text="F_bpfo Multiples", range=[0, 5], row=3, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=1, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=2, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=3, col=1)
fig.update_layout(title_text="Vibration Spectrum : Baseline vs Faulty", height=700,annotations=[
dict(
x=103.1,
y=1.84e9,
xref="x2",
yref="y2",
text="F_bpfo x 1",
showarrow=True,
arrowhead=7,
ax=20,
ay=-40
)])
fig.show()